Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.
Here we read in the data and select a random half of it for exploration.
featFull <- fread("../data/synapsinR_7thA.tif.Pivots.txt.2011Features.txt",showProgress=FALSE)
### Setting a seed and creating an index vector
### to select half of the data
set.seed(2^10)
half1 <- sample(dim(featFull)[1],dim(featFull)[1]/2)
half2 <- setdiff(1:dim(featFull)[1],half1)
feat <- featFull[half1,]
dim(feat)# [1] 559649 144
## Setting the channel names
channel <- c('Synap_1','Synap_2','VGlut1_t1','VGlut1_t2','VGlut2','Vglut3',
'psd','glur2','nmdar1','nr2b','gad','VGAT',
'PV','Gephyr','GABAR1','GABABR','CR1','5HT1A',
'NOS','TH','VACht','Synapo','tubuli','DAPI')
## Setting the channel types
channel.type <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','in.pre.small',
'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
'in.pre','in.post','in.post','in.post','in.pre.small','other',
'ex.post','other','other','ex.post','none','none')
channel.type2 <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','other',
'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
'in.pre','in.post','in.post','in.post','other','other',
'ex.post','other','other','ex.post','other','other')
nchannel <- length(channel)
nfeat <- ncol(feat) / nchannel
## Createing factor variables for channel and channel type sorted properly
ffchannel <- (factor(channel.type,
levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
))
fchannel <- as.numeric(factor(channel.type,
levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
))
ford <- order(fchannel)
## Setting up colors for channel types
Syncol <- c("#197300","#5ed155","#660000","#cc0000","#ff9933","#0000cd","#ffd700")
Syncol3 <- c("#197300","#197300","#cc0000","#cc0000","#0000cd","#0000cd","#0000cd")
ccol <- Syncol[fchannel]
ccol3 <- Syncol3[fchannel]
exType <- factor(c(rep("ex",11),rep("in",6),rep("other",7)),ordered=TRUE)
exCol<-exType;levels(exCol) <- c("#197300","#990000","#0000cd");
exCol <- as.character(exCol)
fname <- as.vector(sapply(channel,function(x) paste0(x,paste0("F",0:5))))
names(feat) <- fname
fcol <- rep(ccol, each=6)
mycol <- colorpanel(100, "purple", "black", "green")
mycol2 <- matlab.like(nchannel)
mycol3 <- colorpanel(100, "blue","white","red")f <- lapply(1:6,function(x){seq(x,ncol(feat),by=nfeat)})
featF <- lapply(f,function(x){subset(feat,select=x)})
featF0 <- featF[[1]]
f01e3 <- 1e3*data.table(apply(X=featF0,2,function(x){((x-min(x))/(max(x)-min(x)))}))
fs <- f01e3
### Taking log_10 on data + 1.
log1f <- log10(featF0 + 1)
slog1f <- data.table(scale(log1f, center=TRUE,scale=TRUE))We now have the following data sets:
featF0: The feature vector looking only at the integrated brightness features.fs: The feature vector scaled between \([0,1000]\).logf1: The feature vector, plus one, then \(log_{10}\) is applied.slog1f: The feature vector, plus one, \(log_{10}\), then scaled by subtracting the mean and dividing by the sample standard deviation.corrf <- cor(featF[[1]])[ford,ford]
corrplot(corrf,method="color",tl.col=ccol[ford], tl.cex=0.8)#corLog <- cor(slog1f)bford <- order(rep(fchannel,each=6))
nord <- Reduce('c', f)
cr <- rep(ccol, each=6)
corrfB <- cor(feat)[bford,bford]
corrplot(corrfB,method="color",tl.col=cr[bford],tl.cex=0.75)## Energy Matrix: Distance Correlation T-testset.seed(317)
sam1 <- sample(dim(featF0)[1], 1e3)
tmp <- as.data.frame((featF0[sam1,]))
set.seed(331)
combcols <- t(combn(24,2))
dc <- foreach(i = 1:dim(combcols)[1]) %dopar% {
#dcov.test(x=tmp[,combcols[i,1]],y=tmp[,combcols[i,2]])
dcor.ttest(x=tmp[,combcols[i,1]],y=tmp[,combcols[i,2]])
}
ms <- matrix(as.numeric(0),24,24)
mp <- matrix(as.numeric(0),24,24)
for(i in 1:length(dc)){
ms[combcols[i,1],combcols[i,2]] <- dc[[i]]$statistic
ms[combcols[i,2],combcols[i,1]] <- dc[[i]]$statistic
mp[combcols[i,1],combcols[i,2]] <- dc[[i]]$p.val
mp[combcols[i,2],combcols[i,1]] <- dc[[i]]$p.val
}
rownames(ms) <- colnames(featF0)
rownames(mp) <- colnames(featF0)
colnames(ms) <- colnames(featF0)
colnames(mp) <- colnames(featF0)
diag(ms) <- as.numeric(0)
diag(mp) <- as.numeric(1)cl5 <- colorRampPalette(c("white", "blue"))
bl5 <- colorRampPalette(c("blue", "red"))
par(mfrow=c(1,2))
tmp <- as.numeric(table(fchannel))
corrplot(ms[ford,ford],is.corr=FALSE,method="color",tl.col=ccol[ford],
tl.cex=0.8, mar=c(0,0,1,0),oma=c(0,0,2,0))
title("Test statistic")
corrRect(tmp,col=Syncol,lwd=4)
corrplot((-log10(mp[ford,ford]+.Machine$double.eps)),is.corr=FALSE,method="color",tl.col=ccol[ford],
tl.cex=0.8,col=mycol3[-c(1:2)], mar=c(0,0,1,0),
p.mat=mp[ford,ford], sig.level=0.05)
title("p-values")
corrRect(tmp,col=Syncol,lwd=4);rm(tmp)The X’s in the above right figure denote a p-value greater than 0.05.
We will run PCA on the untransformed correlation matrix so the data can be viewed in 2-dimensions. The colors correspond to synapse type.
pca1 <- prcomp(corrf,center=TRUE, scale=TRUE)
pairs(pca1$x[,1:3], col=ccol3[ford],pch=20, cex=2)pca <- pca1$x
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s',col=ccol3[ford], size=1)
rgl::rgl.texts(pca[,1],pca[,2],pca[,3],abbreviate(rownames(pca)), col=ccol3[ford], adj=c(0,2))
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pca1",width=720,height=720)pcaB <- prcomp(corrfB,center=TRUE, scale=TRUE)
pairs(pcaB$x[,1:3], col=cr[bford],pch=20, cex=2)plot(pcaB$x[,1:3], col=cr[bford],pch=20, cex=2)
text(pcaB$x[,1:2], labels=rownames(pcaB$x), pos=4, col=cr[bford])pcaB <- pcaB$x
rgl::plot3d(pcaB[,1],pcaB[,2],pcaB[,3],type='s',col=cr[bford], size=1)
rgl::rgl.texts(pcaB[,1],pcaB[,2],pcaB[,3],abbreviate(rownames(pcaB)), col=cr[bford], adj=c(0,2))
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pcaB",width=720,height=720)d1 <- data.frame(type=exType,pca1$x)
#d1 <- data.frame(type=channel.type2[ford],pca1$x)
lda.fit <- lda(type ~ ., data=d1[,1:3])LDA was run using only the first 2 principal components from the untransformed correlation matrix.
voronoidf <- data.frame(x=lda.fit$means[,1],y=lda.fit$means[,2])
#This creates the voronoi line segments
plot(d1[,2:3], col=ccol3[ford], pch=20, cex=1.5)
text(d1[,2:3], labels=rownames(d1), pos=ifelse(d1[,2]<max(d1[,2] -0.5),4,2),
col=ccol3[ford], cex=1.2)
deldir(x = voronoidf$x,y = voronoidf$y, rw = c(-15,15,-15,15),
plotit=TRUE, add=TRUE, wl='te')
text(voronoidf, labels=rownames(voronoidf), pos=c(1,1,3))truth <- d1[,1]
pred <- predict(lda.fit)$class
table(truth, pred)# pred
# truth ex in other
# ex 9 0 2
# in 1 5 0
# other 0 0 7
(lda.err <- sum(as.numeric(truth) != as.numeric(pred))/length(truth))# [1] 0.125
The above gives the LDA error rate.